Distinct echo substrings [KMP, Sliding Window, Rabin-Karp]

Time: O(N^2+D); Space: O(R); hard

Return the number of distinct non-empty substrings of text that can be written as the concatenation of some string with itself (i.e. it can be written as a + a where a is some string).

Example 1:

Input: text = “abcabcabc”

Output: 3

Explanation:

  • The 3 substrings are “abcabc”

Example 2:

Input: text = “leetcodeleetcode”

Output: 2

Explanation:

  • The 2 substrings are “ee”

Constraints:

  • 1 <= len(text) <= 2000

  • text has only lowercase English letters.

Hints:

  1. Given a substring of the text, how to check if it can be written as the concatenation of a string with itself ?

  2. We can do that in linear time, a faster way is to use hashing.

  3. Try all substrings and use hashing to check them.

[1]:
class Solution1(object):
    """
    KMP
    Time:  O(n^2 + d), d is the duplicated of result substrings size
    Space: O(r), r is the size of result substrings set
    """
    def distinctEchoSubstrings(self, text):
        """
        :type text: str
        :rtype: int
        """
        def KMP(text, l, result):
            prefix = [-1]*(len(text)-l)
            j = -1
            for i in range(1, len(prefix)):
                while j > -1 and text[l+j+1] != text[l+i]:
                    j = prefix[j]
                if text[l+j+1] == text[l+i]:
                    j += 1
                prefix[i] = j
                if (j+1) and (i+1) % ((i+1) - (j+1)) == 0 and \
                   (i+1) // ((i+1) - (j+1)) % 2 == 0:
                    result.add(text[l:l+i+1])
            return len(prefix)-(prefix[-1]+1) \
                   if prefix[-1]+1 and len(prefix) % (len(prefix)-(prefix[-1]+1)) == 0 \
                   else float("inf")

        result = set()
        i, l = 0, len(text)-1
        while i < l:  # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabcdefabcdefabcdef
            l = min(l, i + KMP(text, i, result));
            i += 1
        return len(result)
[2]:
s = Solution1()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[3]:
class Solution2(object):
    """
    Time:  O(n^2 + d), d is the duplicated of result substrings size
    Space: O(r), r is the size of result substrings set
    """
    def distinctEchoSubstrings(self, text):
        """
        :type text: str
        :rtype: int
        """
        result = set()
        for l in range(1, len(text)//2 + 1):
            count = sum(text[i] == text[i+l] for i in range(l))
            for i in range(len(text) - 2*l):
                if count == l:
                    result.add(text[i:i+l])
                count += (text[i+l] == text[i+l+l]) - (text[i] == text[i+l])
            if count == l:
                result.add(text[len(text)-2*l:len(text)-2*l+l])
        return len(result)
[4]:
s = Solution2()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[5]:
class Solution3(object):
    """
    Time:  O(n^2 + d), d is the duplicated of result substrings size
    Space: O(r), r is the size of result substrings set
    """
    def distinctEchoSubstrings(self, text):
        """
        :type text: str
        :rtype: int
        """
        MOD = 10**9+7
        D = 27  # a-z and ''
        result = set()
        for i in range(len(text)-1):
            left, right, pow_D = 0, 0, 1
            for l in range(1, min(i+2, len(text)-i)):
                left = (D*left + (ord(text[i-l+1]) - ord('a') + 1)) % MOD
                right = (pow_D*(ord(text[i+l]) - ord('a') + 1) + right) % MOD
                if left == right:  # assumed no collision
                    result.add(left)
                pow_D = (pow_D*D) % MOD
        return len(result)
[6]:
s = Solution3()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2
[7]:
class Solution_TLE(object):
    """
    Time:  O(n^3 + d), d is the duplicated of result substrings size
    Space: O(r), r is the size of result substrings set
    """
    def distinctEchoSubstrings(self, text):
        """
        :type text: str
        :rtype: int
        """
        def compare(text, l, s1, s2):
            for i in range(l):
                if text[s1+i] != text[s2+i]:
                    return False
            return True

        MOD = 10**9+7
        D = 27  # a-z and ''
        result = set()
        for i in range(len(text)):
            left, right, pow_D = 0, 0, 1
            for l in range(1, min(i+2, len(text)-i)):
                left = (D*left + (ord(text[i-l+1])-ord('a')+1)) % MOD
                right = (pow_D*(ord(text[i+l])-ord('a')+1) + right) % MOD
                if left == right and compare(text, l, i-l+1, i+1):
                    result.add(text[i+1:i+1+l])
                pow_D = (pow_D*D) % MOD
        return len(result)
[8]:
s = Solution_TLE()
text = "abcabcabc"
assert s.distinctEchoSubstrings(text) == 3
text = "leetcodeleetcode"
assert s.distinctEchoSubstrings(text) == 2